## Jesse Gubb
## Rakeen Mabud
## Gov2001
## 4/30/12

##############################################################
## Replication R Code:
## Cheap Talk and Party Organizations in State Legislatures
##############################################################

library(foreign)
library(car)
library(systemfit)
library(Zelig)
data<-read.dta("gubbmabud.dta")
#head(data)


#################################
## Table 1, WLS, district bills:
#################################


special.lm<-lm(special_pct ~ salary_perincome + Biennium + Turnover_Never_pct +majority_margin + District_residents + biggest_pct_pop + home_broad + income1000 + alabama + illinois + massachusetts + michigan + minnesota + montana + nebraska + newyork + texas + vermont + virginia + washington + year_1900 + year_1920 + year_1940 + year_1960 + year_1980 + year_1997, weights=bills_intro, data=data)
summary(special.lm)

# with non-partisan legislatures included as "one-party" using "majority_margin_nonpart" variable.
specnonp.lm<-lm(special_pct ~ salary_perincome + Biennium + Turnover_Never_pct +majority_margin_nonpart + District_residents + biggest_pct_pop + home_broad + income1000 + alabama + illinois + massachusetts + michigan + minnesota + montana + nebraska + newyork + texas + vermont + virginia + washington + year_1900 + year_1920 + year_1940 + year_1960 + year_1980 + year_1997, weights=bills_intro, data=data)
summary(specnonp.lm)

####################################
## Table 2, WLS, Statewide bills:
####################################

statewide.lm<-lm(statewide_pct ~ salary_perincome + Biennium + Turnover_Never_pct +majority_margin + District_residents + biggest_pct_pop + home_broad + income_constant + alabama + illinois + massachusetts + michigan + minnesota + montana + nebraska + newyork + texas + vermont + virginia + washington + year_1900 + year_1920 + year_1940 + year_1960 + year_1980 + year_1997, weights=bills_intro, data=data)
summary(statewide.lm)

# with non-partisan legislatures included:
statenonp.lm<-lm(statewide_pct ~ salary_perincome + Biennium + Turnover_Never_pct +majority_margin_nonpart + District_residents + biggest_pct_pop + home_broad + income_constant + alabama + illinois + massachusetts + michigan + minnesota + montana + nebraska + newyork + texas + vermont + virginia + washington + year_1900 + year_1920 + year_1940 + year_1960 + year_1980 + year_1997, weights=bills_intro, data=data)
summary(statenonp.lm)

######################
## Multiple Imputation
######################
# not shown in paper, but gives out of bounds values for missing data, reducing statistical significance of results.
vars <- c(4:8,17,19,25,26,30,35,43,57,65:85,98,119) ## analysis variables plus a few others for imputation
adata<-data[,vars] # subset data for imputation
adata<-adata[,-5]
adatap<-cbind(adata,data$partyorg) # add partyorg to adatap for separate imputation.

#install.packages("Amelia")
library(Amelia)
bds<-matrix(c(5,0,100), nrow=1, ncol=3) ## bounds matrix for maj margin
set.seed(123)
a.out<-amelia(x=adata, m=10, max.resample=100000, idvars=c(14:33)) ## we get out of bounds answers with no bounds. add bounds=bds if you want to see correction, which gives only 0s and 100s by failing to resample with in bounds values.
write.amelia(obj=a.out, file.stem = "outdata")
summary(a.out)
missmap(a.out)
plot(a.out)

## if you want to see the imputed data:
outdata1<-read.table("outdata1.csv", sep=",", header=T)
outdata2<-read.table("outdata2.csv", sep=",", header=T)
outdata3<-read.table("outdata3.csv", sep=",", header=T)
outdata4<-read.table("outdata4.csv", sep=",", header=T)
outdata5<-read.table("outdata5.csv", sep=",", header=T)
outdata6<-read.table("outdata6.csv", sep=",", header=T)
outdata7<-read.table("outdata7.csv", sep=",", header=T)
outdata8<-read.table("outdata8.csv", sep=",", header=T)
outdata9<-read.table("outdata9.csv", sep=",", header=T)
outdata10<-read.table("outdata10.csv", sep=",", header=T)
outdata1$majority_margin # change number to see more datasets

##impute with party org
set.seed(123)
a.out.p<-amelia(x=adatap, m=5, idvars=c(12:31)) 

z.out.imp<-zelig(special_pct ~ majority_margin + salary_perincome + Biennium + Turnover_Never_pct + District_residents + biggest_pct_pop + home_broad + income_constant + alabama + illinois + massachusetts + michigan + minnesota + montana + nebraska + newyork + texas + vermont + virginia + washington + year_1900 + year_1920 + year_1940 + year_1960 + year_1980 + year_1997, model="ls", weights=data$bills_intro, data=a.out$imputations)
summary(z.out.imp) ## their original model with imputed data based on their covraiates - not signficant anymore

######################
## Sensitivity Testing
######################

## significance goes away because of bad imputed values. Use the code below to change majority margin for the NA legislatures to any value. Any value within 0-100 retains significance. Values outside do not.
mm.sens<-data$majority_margin
mm.sens[is.na(mm.sens)] <- 120 ## set value of NAs.
data<-cbind(data,mm.sens)
out.sens<-zelig(statewide_pct ~ mm.sens + salary_perincome + Biennium + Turnover_Never_pct + District_residents + biggest_pct_pop + home_broad + income_constant + alabama + illinois + massachusetts + michigan + minnesota + montana + nebraska + newyork + texas + vermont + virginia + washington + year_1900 + year_1920 + year_1940 + year_1960 + year_1980 + year_1997, model="ls", weights=data$bills_intro, data=data)
summary(out.sens)




###########################################
## Party Organizations and Bills Introduced
###########################################

### Descriptive Statistics ###
mean(data$bills_intro)
var(data$bills_intro) # overdispersed
mean(data$bpm)
summary(data$bpm)
summary(data$bills_intro)


###########
## Figure 1
###########
plot(y=data$bpm, x=data$billyear, cex=.001, xlab="Year", ylab="Bills Per Member")
jitter(text(data$billyear, data$bpm, labels = as.character(data$state),cex = .9))



##########
## Table 3
##########   


    ## bpm models ##

bpm.out<-zelig(bpm ~ majority_margin+partyorg + salary_perincome + Biennium + Turnover_Never_pct + District_residents + biggest_pct_pop + home_broad + income1000 + alabama + illinois + massachusetts + michigan + minnesota + montana + nebraska + newyork + texas + vermont + virginia + washington + year_1900 + year_1920 + year_1940 + year_1960 + year_1980 + year_1997, data=data, model="ls")
summary(bpm.out) ## partyorg (-), salary (-), res (+), city (-) 
bpm.int.out<-zelig(bpm ~ majority_margin*partyorg + salary_perincome + Biennium + Turnover_Never_pct + District_residents + biggest_pct_pop + home_broad + income1000 + alabama + illinois + massachusetts + michigan + minnesota + montana + nebraska + newyork + texas + vermont + virginia + washington + year_1900 + year_1920 + year_1940 + year_1960 + year_1980 + year_1997, data=data, model="ls")
summary(bpm.int.out) ## salary (-), res (+), city (-), partyorg (-)

bpm.nofix.out<-zelig(bpm ~ majority_margin+partyorg + salary_perincome + Biennium + Turnover_Never_pct + District_residents + biggest_pct_pop + home_broad + income1000 + year_1900 + year_1920 + year_1940 + year_1960 + year_1980 + year_1997, data=data, model="ls")
summary(bpm.nofix.out) ##maj margin (+), district res (+), income per cap ++ effect

bpm.intnofx.out<-zelig(bpm ~ majority_margin*partyorg + salary_perincome + Biennium + Turnover_Never_pct + District_residents + biggest_pct_pop + home_broad + income1000 + year_1900 + year_1920 + year_1940 + year_1960 + year_1980 + year_1997, data=data, model="ls")
summary(bpm.intnofx.out)  ## maj margin, income per cap, and district res + effect

library(apsrtable)
apsrtable(bpm.out, bpm.int.out, bpm.nofix.out, bpm.intnofx.out)
# # # # w/ fix effect, party org decreases numbrer of bills introduced as expected. surprisingly, salary decreases the number of bills introduced. (residents increase - more people to reach, big city decreases, more people reached by one bill maybe). Without fixed effects, majority margin becomes an important predictor of increased cheap talk - fits the story of one-party devolving into factionalism. without an opposition party, party labels are less meaningful substitute for personal accomplishments. 

    ## bills intro models ##  (not in paper, but discussed)

intro.out<-zelig(bills_intro ~ majority_margin+partyorg +salary_perincome + Biennium + Turnover_Never_pct + District_residents + biggest_pct_pop + home_broad + income1000 + alabama + illinois + massachusetts + michigan + minnesota + montana + nebraska + newyork + texas + vermont + virginia + washington + year_1900 + year_1920 + year_1940 + year_1960 + year_1980 + year_1997, data=data, model="negbin")
summary(intro.out) ## res (+), city(-), homerule (+)

intro.int.out<-zelig(bills_intro ~ majority_margin*partyorg +salary_perincome + Biennium + Turnover_Never_pct + District_residents + biggest_pct_pop + home_broad + income1000 + alabama + illinois + massachusetts + michigan + minnesota + montana + nebraska + newyork + texas + vermont + virginia + washington + year_1900 + year_1920 + year_1940 + year_1960 + year_1980 + year_1997, data=data, model="negbin")
summary(intro.int.out) ## res(+), city(-), interaction (small-)

intro.nofx.out<-zelig(bills_intro ~ majority_margin+partyorg +salary_perincome + Biennium + Turnover_Never_pct + District_residents + biggest_pct_pop + home_broad + income1000 + year_1900 + year_1920 + year_1940 + year_1960 + year_1980 + year_1997, data=data, model="negbin")
summary(intro.nofx.out) ## maj margin (+), res (+)

intro.intnofx.out<-zelig(bills_intro ~ majority_margin*partyorg +salary_perincome + Biennium + Turnover_Never_pct + District_residents + biggest_pct_pop + home_broad + income1000 + year_1900 + year_1920 + year_1940 + year_1960 + year_1980 + year_1997, data=data, model="negbin")
summary(intro.intnofx.out) ## maj margin (+), res(+)

# # # same general results, maj margin sig w/o fixed effects. except no effect for party org (appears on a per member basis only) 


      ##### Discussion of Results #####
      
## First Differences: (we use per member lm models for this analysis)

   # partyorg: (non interactive results)
x.low<-setx(bpm.out, partyorg=0, fn = list(numeric = median, ordered = median, other = mode))
x.high<-setx(bpm.out, partyorg=1, fn = list(numeric = median, ordered = median, other = mode))


#install.packages("WhatIf")
library(WhatIf)
summary(whatif(data = x.low[-1], cfact = x.high[-1]))
## counterfactual not in convex hull.
s.out <- sim(bpm.out, x = x.low, x1 = x.high)
plot(s.out)
summary(s.out) ## -10 bills introduced per member going from median state w/o party org to one with. this first difference is not entirely within the data, however.


## real example first diff: (# zelig didn't work for this)
X<-cbind(1,data$majority_margin, data$partyorg, data$salary_perincome, data$Biennium, data$Turnover_Never_pct, data$District_residents, data$biggest_pct_pop, data$home_broad, data$income1000, data$alabama,data$illinois,data$massachusetts,data$michigan,data$minnesota,data$montana,data$nebraska,data$newyork,data$texas,data$vermont,data$virginia,data$washington,data$year_1900,data$year_1920,data$year_1940,data$year_1960,data$year_1980,data$year_1997)
##colnames(X)<-c("constant","salary_perincome","Biennium","Turnover_Never_pct","majority_margin"," District_residents","biggest_pct_pop","home_broad","income_constant","alabama","illinois"," massachusetts","michigan","minnesota","montana","nebraska","newyork","texas","vermont","virginia","washington","year_1900","year_1920","year_1940","year_1960","year_1980","year_1997", "partyorg") ## wrong now
set.seed(123)
x.ny.po<-X[data$newyork==1 & data$year_1960==1,] #partyorg=1
x.ny<-X[data$newyork==1 & data$year_1980==1,] #partyorg=0
simbetas<-mvrnorm(mu=bpm.out$coef, Sigma=vcov(bpm.out))

y.ny.po<-simbetas%*%x.ny.po
y.ny<-simbetas%*%x.ny
y.ny-y.ny.po
# model predicts additional 32.8 bpm for NY 1960 to 1980 (going from TPO to not)

   # salary:
xsal.low<-setx(bpm.out, salary_perincome=mean(data$salary_perincome)-sd(data$salary_perincome))
xsal.high<-setx(bpm.out, salary_perincome=mean(data$salary_perincome)+sd(data$salary_perincome))
sal.out <- sim(bpm.out, x = xsal.low, x1 = xsal.high)
plot(sal.out)
summary(sal.out)
summary(whatif(data = xsal.low[-1], cfact = xsal.high[-1]))
## decrease of 7.5 bills per member when income goes from 1sd below mean to 1sd above.
#(these are all tentative results given the lack of data where effect is significant)


   # majority margin (pos but insig effect):
x.mm.low<-setx(bpm.out, majority_margin=mean(na.omit(data$majority_margin))-sd(na.omit(data$majority_margin)))
x.mm.high<-setx(bpm.out, majority_margin=mean(na.omit(data$majority_margin))+sd(na.omit(data$majority_margin)))
s.mm.out <- sim(bpm.out, x = x.mm.low, x1 = x.mm.high)
plot(s.mm.out)
summary(s.mm.out)
summary(whatif(data = xsal.low[-1], cfact = xsal.high[-1]))
# insig increase in 1 bill per member as maj margin increases

    # majority margin w/o fixed effects:
x.maj.low<-setx(bpm.nofix.out, majority_margin=mean(na.omit(data$majority_margin))-sd(na.omit(data$majority_margin)))
x.maj.high<-setx(bpm.nofix.out, majority_margin=mean(na.omit(data$majority_margin))+sd(na.omit(data$majority_margin)))    
s.maj.out<-sim(bpm.nofix.out, x = x.maj.low, x1 = x.maj.high)
summary(s.maj.out)
plot(s.maj.out) ## predicts 5.9 bpm increase for one unit increase in maj margin. 


###########
## FIGURE 2
###########

           ## Marginal Effect Plot for Interactive Model ##

xpoints<-seq(from=0, to=100, by=.05)
meff<-coef(bpm.int.out)[3]+ coef(bpm.int.out)[29]*xpoints
se<-sqrt(vcov(bpm.int.out)[3,3]+xpoints*vcov(bpm.int.out)[29,29] - 2*xpoints*vcov(bpm.int.out)[3,29])
lower<-meff-1.96*se
upper<-meff+1.96*se

plot(NA, NA, xlim=c(0,100), ylim=c(-80,25), ylab="Marginal Effect", xlab="Majority Margin")
lines(xpoints, meff)
jitter(rug(data$majority_margin[partyorg==1], ticksize=.04, lwd=1))
jitter(rug(data$majority_margin[partyorg==0], ticksize=-.02)) ## data where partyorg==1
lines(xpoints, lower, lty=2)
lines(xpoints,upper, lty=2)
abline(a=0,b=0, lty=3)



##############################################
################## Appendix ##################
##############################################

# To replicate Gamm and Kousser's (2010) original tables,
# use this code in STATA:

## Table 3, col 1 and 2:
##sureg (special_pct  salary_perincome  Biennium Turnover_Never_pct majority_margin District_residents  biggest_pct_pop  home_broad income_constant alabama illinois massachusetts  michigan minnesota montana nebraska newyork texas vermont virginia washington  year_1900 year_1920  year_1940 year_1960 year_1980 year_1997)  (statewide_pct  salary_perincome  Biennium Turnover_Never_pct majority_margin District_residents  biggest_pct_pop  home_broad income_constant alabama illinois massachusetts  michigan minnesota montana nebraska newyork texas vermont virginia washington  year_1900 year_1920  year_1940 year_1960 year_1980 year_1997) (general_pct  salary_perincome  Biennium Turnover_Never_pct majority_margin District_residents  biggest_pct_pop  home_broad income_constant alabama illinois massachusetts  michigan minnesota montana nebraska newyork texas vermont virginia washington  year_1900 year_1920  year_1940 year_1960 year_1980 year_1997) [weight= bills_intro]

## Table 3, col 3 and 4:
##sureg (special_pct  salary_perincome  Biennium Turnover_Never_pct  majority_margin_nonpart District_residents  biggest_pct_pop  home_broad income_constant alabama illinois massachusetts  michigan minnesota montana nebraska newyork texas vermont virginia washington  year_1900 year_1920 year_1940 year_1960 year_1980 year_1997) (statewide_pct  salary_perincome  Biennium Turnover_Never_pct  majority_margin_nonpart District_residents  biggest_pct_pop  home_broad income_constant alabama illinois massachusetts  michigan minnesota montana nebraska newyork texas vermont virginia washington  year_1900 year_1920  year_1940 year_1960 year_1980 year_1997) (general_pct  salary_perincome  Biennium Turnover_Never_pct  majority_margin_nonpart District_residents  biggest_pct_pop  home_broad income_constant alabama illinois massachusetts  michigan minnesota montana nebraska newyork texas vermont virginia washington  year_1900 year_1920  year_1940 year_1960 year_1980 year_1997) [weight= bills_intro]


###############################
## REPLICATED FIGURES NOT SHOWN
###############################

## We also replicated, but did not include in the paper, two figures from the original paper:

# we replicate a few figures to get a sense of how they present their data.

## FIG 1
## data had to be aggregated manually to get the values seen in the figure. Advice on doing this more elegantly would be appreciated.

fig.data<-cbind(data$statewide_pct, data$general_pct, data$special_pct, data$billyear)
fig.data<-fig.data[order(data$billyear),]
colnames(fig.data)<-c("statewide_pct", "general_pct", "special_pct", "year")
years<-sort(unique(data$billyear))
fig.data

# we calculated means for each year in order to plot the figure. this is almost by hand, very inefficient. suggestions appreciated.
holder<-matrix(NA, ncol=4, nrow=length(years))
holder[1,1]<-mean(fig.data[(1:2),1]) #1880
holder[1,2]<-mean(fig.data[(1:2),2])
holder[1,3]<-mean(fig.data[(1:2),3])
holder[1,4]<-fig.data[1,4]
holder[2,1]<-mean(fig.data[(3:13),1]) #1881
holder[2,2]<-mean(fig.data[(3:13),2])
holder[2,3]<-mean(fig.data[(3:13),3])
holder[2,4]<-fig.data[3,4]
holder[3,1]<-mean(fig.data[(14:15),1]) #1900
holder[3,2]<-mean(fig.data[(14:15),2])
holder[3,3]<-mean(fig.data[(14:15),3])
holder[3,4]<-fig.data[14,4]
holder[4,1]<-mean(fig.data[(16:26),1]) #1901
holder[4,2]<-mean(fig.data[(16:26),2])
holder[4,3]<-mean(fig.data[(16:26),3])
holder[4,4]<-fig.data[16,4]
holder[5,1]<-fig.data[27,1] #1919
holder[5,2]<-fig.data[27,2]
holder[5,3]<-fig.data[27,3]
holder[5,4]<-fig.data[27,4]
holder[6,1]<-fig.data[28,1] #1920
holder[6,2]<-fig.data[28,2]
holder[6,3]<-fig.data[28,3]
holder[6,4]<-fig.data[28,4]
holder[7,1]<-mean(fig.data[(29:39),1]) #1921
holder[7,2]<-mean(fig.data[(29:39),2])
holder[7,3]<-mean(fig.data[(29:39),3])
holder[7,4]<-fig.data[29,4]
holder[8,1]<-fig.data[40,1] #1939
holder[8,2]<-fig.data[40,2]
holder[8,3]<-fig.data[40,3]
holder[8,4]<-fig.data[40,4]
holder[9,1]<-mean(fig.data[(41:51),1]) #1941
holder[9,2]<-mean(fig.data[(41:51),2])
holder[9,3]<-mean(fig.data[(41:51),3])
holder[9,4]<-fig.data[41,4]
holder[10,1]<-fig.data[52,1] #1942
holder[10,2]<-fig.data[52,2]
holder[10,3]<-fig.data[52,3]
holder[10,4]<-fig.data[52,4]
holder[11,1]<-fig.data[53,1] #1960
holder[11,2]<-fig.data[53,2]
holder[11,3]<-fig.data[53,3]
holder[11,4]<-fig.data[53,4]
holder[12,1]<-mean(fig.data[(54:65),1]) #1961
holder[12,2]<-mean(fig.data[(54:65),2])
holder[12,3]<-mean(fig.data[(54:65),3])
holder[12,4]<-fig.data[54,4]
holder[13,1]<-mean(fig.data[(66:78),1]) #1981
holder[13,2]<-mean(fig.data[(66:78),2])
holder[13,3]<-mean(fig.data[(66:78),3])
holder[13,4]<-fig.data[66,4]
holder[14,1]<-mean(fig.data[(79:91),1]) #1997
holder[14,2]<-mean(fig.data[(79:91),2])
holder[14,3]<-mean(fig.data[(79:91),3])
holder[14,4]<-fig.data[79,4]

figData<-holder
colnames(figData)<-colnames(fig.data)
figData

## the authors omit years in the figure for which they have less data. so we must modify figData to include only the data they use

lessfig<-matrix(NA, nrow=7, ncol=4)
colnames(lessfig)<-colnames(figData)
lessfig[1,]<-figData[2,]
lessfig[2,]<-figData[4,]
lessfig[3,]<-figData[7,]
lessfig[4,]<-figData[9,]
lessfig[(5:7),]<-figData[(12:14),]
lessfig

## FIGURE 1
plot(x=lessfig[,4], y=lessfig[,1], ylim=c(0,100), xlim=c(1881,1997), xlab="years", ylab="Percentage of Total Bill Introductions", main="Figure 1. Variation across Time in Types of Bills Introduced", type="b", pch=18, lty=2)
	lines(x=lessfig[,4], y=lessfig[,2], lty=4)
	lines(x=lessfig[,4], y=lessfig[,3], lty=1)
	points(x=lessfig[,4], y=lessfig[,2], pch=17)
	points(x=lessfig[,4], y=lessfig[,3], pch=15)
	legend(1948,105.2, c("Statewide Bills", "District Bills", "General Local Govt Bills"), 
	lty = c(2, 4, 1), pch = c(18, 17, 15), merge = F, )

## Fig 1 with more data:
plot(x=figData[,4], y=figData[,1], ylim=c(0,100), xlim=c(1881,1997), xlab="years", ylab="Percentage of Total Bill Introductions", main="Figure 1 Expanded", type="b", pch=18, lty=2)
	lines(x=figData[,4], y=figData[,2], lty=4)
	lines(x=figData[,4], y=figData[,3], lty=1)
	points(x=figData[,4], y=figData[,2], pch=17)
	points(x=figData[,4], y=figData[,3], pch=15)
	legend(1948,105.2, c("Statewide Bills", "District Bills", "General Local Govt Bills"), 
	lty = c(2, 4, 1), pch = c(18, 17, 15), merge = F, ) 


## FIGURE 3
plot(x=data$majority_margin, y=data$special_pct, main="Figure 3. Effect of Party Competition on District Legislation", ylab="District Bills as Percentage of Total Introductions", xlab="Majority Party Margin (% of seats)", xlim=c(0,100), ylim=c(0,100), cex=.5, pch=16)

